---
title: "Russian Businessperson Voting"
subtitle: "Data Cleaning"
author: "Noah Dasanaike"
output: pdf_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE)
library(tidyverse)
library(readxl)
library(jtools)
library(huxtable)
library(janitor)
library(extrafont)
```

```{r}
load("dasanaike_duma_cohesion.Rdata")
```

<!--
clean deputy names 

```{r}
library(janitor)

smd <- read_xlsx("smd.xlsx") %>% 
  clean_names()

smd$deputy <- gsub("([[:lower:]])([[:upper:]][[:lower:]])", "\\1 \\2", 
                   smd$deputy)
smd$deputy <- str_replace_all(smd$deputy, "[\r\n]" , "")

smd <- smd[seq(1, nrow(smd), 2),] %>%
  rename(name = deputy) %>%
  mutate(name = tolower(name))
```

function for reading in data

```{r}
merge_results <- function(read_data){
  # Read in deputies and their parties

  parties <- read_csv("duma_cleaned.csv") %>%
    mutate(name = tolower(name)) %>%
    select(name, party)
  
  parties$name <- str_replace_all(parties$name, "[\r\n]" , "")
  
  # Read in all deputies that are either directors, deputy directors, board members,
  # or executives in one of the above companies
  
  companies <- read_csv(read_data) %>%
    mutate(name = tolower(name))
  
  companies$name <- str_replace_all(companies$name, "[\r\n]" , "")
  
  # Read in name and discord votes, then merge with parties
  
  voting <- read_csv("voting_discord_revise.csv") %>%
    mutate(name = tolower(name)) %>%
    rowwise() %>%
    mutate(disc_perc = 100 * percent) %>%
    select(-c(total_votes, discord)) %>%
    rename(discord = disc_perc) %>%
    merge(parties, by = "name", all = TRUE) %>%
    drop_na(party) 
  
  # Merge people connected with companies and deputy voting data
  
  merge <- companies %>%
    merge(voting, by = "name", all = TRUE) 
  
  merge[is.na(merge)] <- 0
  
  # Create a variable on position, and add "none" for those not connected to a company
  
  merge <- merge %>%
    rowwise() %>%
    mutate(none = ifelse(director == 0 & deputy_director == 0 & 
                           board_member == 0 & executive == 0, 1, 0)) %>%
    pivot_longer(cols = -c(name, discord, party), names_to = "position") %>%
    rowwise() %>%
    mutate(party = ifelse(grepl("КОММУНИСТИЧЕСКАЯ ПАРТИЯ", party), 
                          "КОММУНИСТИЧЕСКАЯ ПАРТИЯ", party)) %>%
    filter(value == 1) %>%
    rowwise() %>%
    mutate(party = case_when(party == "ЛДПР" ~ "Liberal Democratic Party",
                             party == "ЕДИНАЯ РОССИЯ" ~ "United Russia",
                             party == "КОММУНИСТИЧЕСКАЯ ПАРТИЯ" ~ "Communist Party",
                             party == "СПРАВЕДЛИВАЯ РОССИЯ" ~ "A Just Russia"))
  
  return(merge)
}
```

```{r, message = FALSE}
# read in data

results <- merge_results("duma_company_number.csv") %>%
  inner_join(smd, by = "name") %>%
  group_by(name, discord, party, smd) %>%
  summarize(exec_dummy = ifelse(position != "none", 1, 0)) %>%
  group_by(name) %>%
  select(exec_dummy, party, smd, discord) %>%
  unique()
```

executive in top-25 or top-500 firm

```{r, echo=FALSE, message=FALSE}
elite_company_25 <- merge_results("elite_25.csv") %>%
  inner_join(smd, by = "name") %>%
  group_by(name, discord, party, smd) %>%
  summarize(exec_dummy = ifelse(position != "none", 1, 0)) %>%
  group_by(name) %>%
  select(exec_dummy, party) %>%
  unique()
```

```{r, echo=FALSE,  message=FALSE}
elite_company_500 <- merge_results("elite_company_number.csv") %>%
  inner_join(smd, by = "name") %>%
  group_by(name, discord, party, smd) %>%
  summarize(exec_dummy = ifelse(position != "none", 1, 0)) %>%
  group_by(name) %>%
  select(exec_dummy, party) %>%
  unique()
```

members of budgetary committee

```{r}
budgetary_committee = tibble(name = c("Макаров Андрей Михайлович", "Катасонов Сергей Михайлович",
                        "Ремезков Александр Александрович", "Симановский Леонид Яковлевич",
                        "Хор Глеб Яковлевич", "Шурчанов Валентин Сергеевич",
                        "Максимова Надежда Сергеевна", "Чижов Сергей Викторович",
                        "Бикбаев Ильдар Зинурович", "Бобрышев Юрий Иванович",
                        "Бузилов Валерий Викторович", "Ганзя Вера Анатольевна",
                        "Гончар Николай Николаевич", "Данчикова Галина Иннокентьевна",
                        "Зубарев Виктор Владиславович", "Ковпак Лев Игоревич",
                        "Красноштанов Алексей Николаевич", "Носов Александр Алексеевич",
                        "Ооржак Мерген Дадар-оолович", "Резник Владислав Матусович",
                        "Селиверстов Виктор Валентинович", "Скруг Валерий Степанович",
                        "Фаррахов Айрат Закиевич", "Федоров Евгений Алексеевич",
                        "Шубин Игорь Николаевич", "Щапов Михаил Викторович",
                        "Юрков Дмитрий Васильевич"),
                        budget_committee = 1) %>%
  mutate(name = tolower(name))
```

add controls

```{r, message = FALSE}
# electoral vote share for smd deputies
smd_share <- read_csv("smd_share.csv", skip = 1) %>%
  rename(name = `кандидат`,
         vote_share = colnames(read_csv("smd_share.csv", 
                                        skip = 1))[7]) %>%
  select(name, vote_share) %>%
  mutate(name = tolower(str_remove(name, ",")))

# leadership
chairmen <- tibble(name = c("володин вячеслав викторович", "жуков александр дмитриевич", 
             "мельников иван иванович", "ананских игорь александрович",
             "гордеев алексей васильевич", "лебедев игорь владимирович",
             "тимофеева ольга викторовна", "толстой петр олегович", 
             "яровая ирина анатольевна"),
             dep_chairmen = 1)

comm_chairmen <- tibble(name = c("кашин владимир иванович", "пискарев василий иванович",
                                 "макаров андрей михайлович", "плетнева тамара васильевна",
                                 "крашенинников павел владимирович", "калашников леонид иванович",
                                 "газзаев валерий георгиевич", "хованская галина петровна",
                                 "хинштейн александр евсеевич", "савастьянова ольга викторовна",
                                 "ямпольская елена александровна", "слуцкий леонид эдуардович",
                                 "шаманов владимир анатольевич", "никонов вячеслав алексеевич",
                                 "морозов дмитрий анатольевич", "николаев николай петрович",
                                 "гаврилов сергей анатольевич", "харитонов николай михайлович",
                                 "москвичев евгений сергеевич", "нилов ярослав евгеньевич",
                                 "Диденко Алексей Николаевич", "пайкин борис романович",
                                 "аксаков анатолий геннадьевич", "бурматов владимир владимирович",
                                 "жигарев сергей александрович", "завальный павел николаевич",
                                 "сазонов дмитрий валерьевич", "Пискарев Василий Иванович",
                                 "аршба отари ионович", "гутенев владимир владимирович",
                                 "макаров андрей михайлович", "гильмутдинов ильдар ирекович"),
                        comm_chairmen = 1)

party_leader <- tibble(name = c("неверов сергей иванович", "зюганов геннадий андреевич",
                                "жириновский владимир вольфович", "миронов сергей михайлович"),
                        party_leader = 1)
```
-->

table 2

```{r pr}
results %>% 
  mutate(party_list = ifelse(smd == 0, 1, 0)) %>% 
  select(name, party, smd, party_list) %>%
  group_by(party, smd, party_list) %>%
  summarize(total = n())
```

figure 1

```{r}
results %>%
  mutate(exec_dummy = ifelse(exec_dummy == 0, "Non-Executive", "Executive")) %>%
  mutate(group = paste(party, exec_dummy, sep = ", ")) %>%
  ggplot(aes(x = exec_dummy, y = discord)) +
  geom_boxplot() +
  theme_bw() +
  theme(axis.text.x = element_text(color = "black", size = 9),
        axis.title.x = element_text(size = 12),
        axis.text.y = element_text(color = "black", size = 9),
        axis.title.y = element_text(size = 12),
        legend.text = element_text(size = 10),
        legend.title = element_text(size = 12)) + 
  facet_wrap(~ party, nrow = 1) +
  labs(x = "", y = "Dissent on 301 Budget Votes")
```

table 3

```{r}
results_three_smd <- results %>%
  rowwise() %>%
  mutate(party_list = ifelse(smd == 0, 1, 0)) %>%
  filter(smd == 1) %>%
  mutate(value = 1,
         party = as.factor(party)) %>%
  select(-c(smd)) 

results_three_smd[is.na(results_three_smd)] <- 0

results_three_party <- results %>%
  rowwise() %>%
  mutate(party_list = ifelse(smd == 0, 1, 0)) %>%
  filter(party_list == 1) %>%
  mutate(value = 1,
         party = as.factor(party)) %>%
  select(-c(party_list))

results_three_party[is.na(results_three_party)] <- 0

results_three_smd_fit <- lm(discord ~ party + exec_dummy, results_three_smd)

results_three_party_fit <- lm(discord ~ party + exec_dummy, results_three_party)

table_one <- export_summs(results_three_smd_fit,
             results_three_party_fit,
             stars = c(`****` = .001, `***` = 0.01, `**` = 0.05, `*` = 0.1),
             results = "asis",
             model.names = c("SMD Candidates",
                             "Party-List Candidates")) 

table_one
```

models 1, 2, and 3

```{r one_b}
results_two <- results %>%
  rowwise() %>%
  mutate(value = 1,
         party = as.factor(party)) %>%
  clean_names() 

results_two[is.na(results_two)] <- 0

results_two_fit <- 
  lm(discord ~ party + smd + exec_dummy, results_two)

results_two_bud <- results %>%
  rowwise() %>%
  mutate(value = 1,
         party = as.factor(party)) %>%
  clean_names() %>%
  merge(budgetary_committee, by = "name", all = TRUE)

results_two_bud[is.na(results_two_bud)] <- 0

results_two_fit_bud <- 
  lm(discord ~ party + smd + exec_dummy + budget_committee, results_two_bud)

elite_dum <- elite_company_25 %>%
  rename(elite_25 = exec_dummy) %>%
  select(name, elite_25)

elite_dum_500 <- elite_company_500 %>%
  rename(elite_500 = exec_dummy) %>%
  select(name, elite_500)

results_two_elite_dum <- results %>%
  rowwise() %>%
  mutate(value = 1,
         party = as.factor(party)) %>%
  clean_names() %>%
  merge(elite_dum, by = "name", all = TRUE) %>%
  merge(elite_dum_500, by = "name", all = TRUE)

results_two_elite_dum[is.na(results_two_elite_dum)] <- 0

results_two_fit_elite <- 
  lm(discord ~ party + smd + elite_25 + elite_500, results_two_elite_dum)

export_summs(results_two_fit, results_two_fit_bud, results_two_fit_elite,
             stars = c(`****` = .001, `***` = 0.01, `**` = 0.05, `*` = 0.1),
             results = "asis")
```

models 4 and 5

```{r}
results_controls_smd <- results %>%
  rowwise() %>%
  mutate(party_list = ifelse(smd == 0, 1, 0)) %>%
  mutate(value = 1,
         party = as.factor(party)) %>%
  clean_names() %>%
  merge(budgetary_committee, by = "name", all = TRUE) %>%
  merge(smd_share, by = "name", all.x = TRUE) %>%
  mutate(vote_share = str_replace(vote_share, ",", "."),
         vote_share = as.numeric(vote_share) / 100) %>%
  filter(!is.na(vote_share) & smd == 1) %>%
  select(-c(value, party_list, smd))

results_controls_smd[is.na(results_controls_smd)] <- 0

control_lm_smd <- lm(discord ~ exec_dummy + party + budget_committee 
                     + vote_share, results_controls_smd)

summary(control_lm_smd)

results_controls <- results %>%
  rowwise() %>%
  mutate(party = as.factor(party)) %>%
  clean_names() %>%
  merge(budgetary_committee, by = "name", all = TRUE) %>%
  merge(chairmen, by = "name", all = TRUE) %>%
  merge(comm_chairmen, by = "name", all = TRUE) %>%
  merge(party_leader, by = "name", all = TRUE)

results_controls[is.na(results_controls)] <- 0

control_lm <- lm(discord ~ party + budget_committee + dep_chairmen +
                   comm_chairmen + party_leader + smd + exec_dummy, results_controls)

summary(control_lm)
```

models 6 and 7

```{r}
results_four <- results %>%
  rowwise() %>%
  mutate(value = 1) %>%
  pivot_wider(names_from = "party", values_from = "value") %>%
  clean_names() 

results_four[is.na(results_four)] <- 0

results_four_smd_fit <- lm(discord ~ united_russia + liberal_democratic_party + 
                             communist_party + smd + exec_dummy, results_four)

results_four_exec_fit <- lm(discord ~ united_russia + liberal_democratic_party + 
                              communist_party + (smd * exec_dummy), results_four)


table_four <- export_summs(results_four_smd_fit,
                           results_four_exec_fit,
             stars = c(`****` = .001, `***` = 0.01, `**` = 0.05, `*` = 0.1),
             results = "asis") 

table_four
```

figure 2

```{r}
set.seed(10)

results_five <- results %>%
  rowwise() %>%
  mutate(value = 1) %>%
  pivot_wider(names_from = "party", values_from = "value") %>%
  clean_names() 

results_five[is.na(results_five)] <- 0

final_fit <- lm(discord ~ united_russia + liberal_democratic_party + 
                  communist_party + smd + exec_dummy + (smd * exec_dummy), 
                results_five)

new_data <- results_five %>%
  select(smd, communist_party, liberal_democratic_party, united_russia,
         exec_dummy) %>%
  unique() %>%
  as.data.frame()

prediction <- predict.lm(final_fit, new_data, se.fit = TRUE, level = 0.95)

new_data$pred <- prediction$fit
new_data$se <- prediction$se.fit

filtered <- new_data %>%
  rowwise() %>%
  pivot_longer(cols = -c(pred, exec_dummy, smd, se), names_to = "party", 
               values_to= "fill") %>%
  filter(fill == 1) %>%
  select(-c(fill)) %>%
  tibble()

ci_data <- results_five %>%
  select(name, exec_dummy, smd, communist_party, 
         liberal_democratic_party, united_russia) %>%
  pivot_longer(cols = c(united_russia, communist_party, liberal_democratic_party), 
               names_to = "party", values_to = "fill") %>%
  filter(fill == 1) %>%
  group_by(party, smd, exec_dummy) %>%
  summarize(total = n())

complete <- filtered %>%
  merge(ci_data) %>%
  mutate(ymin = pred - (1.96 * (se / sqrt(total))),
         ymax = pred + (1.96 * (se / sqrt(total)))) %>%
  rename(fit = pred)

loadfonts()

pred_graph <- complete %>%
  mutate(smd = ifelse(smd == 1, "SMD", "PR"),
         exec_dummy = ifelse(exec_dummy == 1, "Executive", "Non-Executive")) %>%
  ggplot(aes(x = party, y = fit, group = interaction(exec_dummy, smd), 
             shape = interaction(exec_dummy, smd))) +
  geom_jitter(position = position_dodge(width = 0.6), size = 3) +
  geom_errorbar(aes(ymin = ymin, ymax = ymax), position = position_dodge(width = 0.6), 
                width = 0.5) +
  theme_bw() +
  scale_shape_manual(name = "Interaction") +
  labs(x = "Party", y = "Predicted Dissent on 301 Budget Votes") +
  scale_x_discrete(labels = c( "Communist Party", "Liberal Democrats", "United Russia")) +
  scale_shape_manual(name = "Identity", 
                    values = c(15, 0, 17, 2),
                    labels = c("PR Executive", "PR Non-Executive", "SMD Executive", 
                               "SMD Non-Executive"))

pred_graph +
  theme(axis.text.x = element_text(color = "black", size = 10),
        axis.title.x = element_text(size = 12),
        axis.text.y = element_text(color = "black", size = 9),
        axis.title.y = element_text(size = 12),
        legend.text = element_text(size = 10),
        legend.title = element_text(size = 12))
```